#include "opencl_source_map.hpp" 
namespace MNN { 
#ifndef MNN_OPENCL_BUFFER_CLOSED
const char* scale_buf = 
"#ifdef MNN_SUPPORT_FP16\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"#endif\n"
"#define GLOBAL_SIZE_2_DIMS "" __private const int global_size_dim0,__private const int global_size_dim1,\n"
"#define DEAL_NON_UNIFORM_DIM2(input1, input2) "" if (input1 >= global_size_dim0 || input2 >= global_size_dim1) { "" return; "" }\n"
"__kernel void scale_buf(GLOBAL_SIZE_2_DIMS\n"
" __global const FLOAT* input,\n"
" __global const FLOAT* scale,\n"
"#ifdef BIAS\n"
" __global const FLOAT* bias,\n"
"#endif\n"
" __global FLOAT* output,\n"
" __private const int channelBlock,\n"
" __private const int batch,\n"
" __private const int inside) {\n"
" const int x=get_global_id(0); // inside(width*height)\n"
" const int y=get_global_id(1); // channelBlock*batch\n"
" \n"
" DEAL_NON_UNIFORM_DIM2(x,y);\n"
" const int out_c_idx=y % channelBlock;\n"
" const int out_b_idx=y/channelBlock;\n"
" const int offset=((out_b_idx+out_c_idx*batch)*inside+x)*4;\n"
" COMPUTE_FLOAT4 in_value=CONVERT_COMPUTE_FLOAT4(vload4(0,input+offset));\n"
" COMPUTE_FLOAT4 scale_value=CONVERT_COMPUTE_FLOAT4(vload4(out_c_idx,scale));\n"
" #ifdef BIAS\n"
" COMPUTE_FLOAT4 bias_value=CONVERT_COMPUTE_FLOAT4(vload4(out_c_idx,bias));\n"
" COMPUTE_FLOAT4 out_value=in_value*scale_value+bias_value;\n"
" #else\n"
" COMPUTE_FLOAT4 out_value=in_value*scale_value;\n"
" #endif\n"
" vstore4(CONVERT_FLOAT4(out_value),0,output+offset);\n"
"}\n"
;
#endif
}
